Introduction

In this project we are asked to use a data which has x, y and z coordinates and use these values to make gesture recognition. We will find the acceleration values and by using some data manipulation, we want to calculate speed and location values. These values will help us find the locations of all points in the classes, so we will be able to create our figure. For the first part of this project, we will use 3D scatter plot for our visualization. By using the location values for all axes, we will try to create a gesture shape and try to make our figures similar to the desired shapes. For the second part of this project, we will use two alternative representation methods to help us understand the results. These methods will be individual model representation and class based representation.

library(data.table,quietly = TRUE,warn.conflicts = FALSE)
library(ggplot2,quietly = TRUE,warn.conflicts = FALSE)
library(repr,quietly = TRUE,warn.conflicts = FALSE)
library(rpart,quietly = TRUE,warn.conflicts = FALSE)
library(rattle,quietly = TRUE,warn.conflicts = FALSE)
## Rattle: A free graphical interface for data science with R.
## Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(TSrepr,quietly = TRUE,warn.conflicts = FALSE)
library(zoo,quietly = TRUE,warn.conflicts = FALSE)
library(plotly,quietly = TRUE,warn.conflicts = FALSE)
library(knitr,quietly = TRUE,warn.conflicts = FALSE)

We read our data with fread function.

x<-fread("D:/Boğaziçi/İe48b/hw1/uWaveGestureLibrary_X_TRAIN")
y<-fread("D:/Boğaziçi/İe48b/hw1/uWaveGestureLibrary_Y_TRAIN")
z<-fread("D:/Boğaziçi/İe48b/hw1/uWaveGestureLibrary_Z_TRAIN")

Data manipulation

We need to change the names of our columns in order to understand what they represent more easily. So we name our first column as class for x, y and z axes.

setnames(x,'V1','class')
train_x=x[order(class)]
train_x[,class:=as.character(class)]
train_x[,instances:=1:.N]
head(train_x[,c(1:5)])
##    class         V2         V3         V4         V5
## 1:     1 -0.7914472 -0.7914472 -0.7958727 -0.8100650
## 2:     1 -0.4496024 -0.4496024 -0.4496024 -0.4496024
## 3:     1 -0.1874689 -0.1874689 -0.1874689 -0.1874689
## 4:     1  0.3014817  0.3014817  0.3014817  0.3014817
## 5:     1  0.1632022  0.1632022  0.1632022  0.1632022
## 6:     1 -0.9319900 -0.9319900 -0.9319900 -0.9319900
setnames(y,'V1','class')
train_y=y[order(class)]
train_y[,class:=as.character(class)]
train_y[,instances:=1:.N]
head(train_y[,c(1:5)])
##    class         V2         V3         V4         V5
## 1:     1 -1.9599838 -1.9599838 -1.9566005 -1.9457505
## 2:     1 -1.9712004 -1.9712004 -1.9712004 -1.9712004
## 3:     1 -1.2364488 -1.2364488 -1.2364488 -1.2364488
## 4:     1 -1.4935131 -1.4935131 -1.4935131 -1.4935131
## 5:     1 -0.6158814 -0.6158814 -0.6158814 -0.6158814
## 6:     1 -1.6753297 -1.6753297 -1.6753297 -1.6753297
setnames(z,'V1','class')
train_z=z[order(class)]
train_z[,class:=as.character(class)]
train_z[,instances:=1:.N]
head(train_z[,c(1:5)])
##    class         V2         V3         V4         V5
## 1:     1 -0.2490781 -0.2490781 -0.2514390 -0.2590101
## 2:     1 -1.2897498 -1.2897498 -1.2897498 -1.2897498
## 3:     1 -0.8108461 -0.8108461 -0.8108461 -0.8108461
## 4:     1 -1.1472344 -1.1472344 -1.1472344 -1.1472344
## 5:     1 -1.3271478 -1.3271478 -1.3271478 -1.3271478
## 6:     1 -1.8033172 -1.8033172 -1.8033172 -1.8033172

We melt our data and add time column. We create instances column to find the number of occurence of the values. We call the melted value acceleration.

long_x=melt(train_x,id.vars=c('instances','class'),value='acc_x')
long_x[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_x=long_x[,list(instances,class,time,acc_x)]
long_x=long_x[order(instances,time)]
head(long_x)
##    instances class time      acc_x
## 1:         1     1    1 -0.7914472
## 2:         1     1    2 -0.7914472
## 3:         1     1    3 -0.7958727
## 4:         1     1    4 -0.8100650
## 5:         1     1    5 -0.8492300
## 6:         1     1    6 -0.9034648
long_y=melt(train_y,id.vars=c('instances','class'),value='acc_x')
long_y[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_y=long_y[,list(instances,class,time,acc_x)]
long_y=long_y[order(instances,time)]
head(long_y)
##    instances class time     acc_x
## 1:         1     1    1 -1.959984
## 2:         1     1    2 -1.959984
## 3:         1     1    3 -1.956600
## 4:         1     1    4 -1.945750
## 5:         1     1    5 -1.915809
## 6:         1     1    6 -1.874347
long_z=melt(train_z,id.vars=c('instances','class'),value='acc_z')
long_z[,time:=as.numeric(gsub("\\D", "", variable))-1]
long_z=long_z[,list(instances,class,time,acc_z)]
long_z=long_z[order(instances,time)]
head(long_z)
##    instances class time      acc_z
## 1:         1     1    1 -0.2490781
## 2:         1     1    2 -0.2490781
## 3:         1     1    3 -0.2514390
## 4:         1     1    4 -0.2590101
## 5:         1     1    5 -0.2799033
## 6:         1     1    6 -0.3088358

We find speed and location values by using cumulative sum approach. By using acceleration values we calculate speed values and by using speed values we calculate location values.

merged=long_x
merged[,speed_x:=cumsum(acc_x),by=instances]
merged[,location_x:=cumsum(speed_x),by=instances]

merged[,acc_y:=long_y[,4]]
merged[,speed_y:=cumsum(acc_y),by=instances]
merged[,location_y:=cumsum(speed_y),by=instances]

merged[,acc_z:=long_z[,4]]
merged[,speed_z:=cumsum(acc_z),by=instances]
merged[,location_z:=cumsum(speed_z),by=instances]

head(merged)
##    instances class time      acc_x    speed_x  location_x     acc_y    speed_y
## 1:         1     1    1 -0.7914472 -0.7914472  -0.7914472 -1.959984  -1.959984
## 2:         1     1    2 -0.7914472 -1.5828944  -2.3743415 -1.959984  -3.919968
## 3:         1     1    3 -0.7958727 -2.3787671  -4.7531086 -1.956600  -5.876568
## 4:         1     1    4 -0.8100650 -3.1888321  -7.9419407 -1.945750  -7.822319
## 5:         1     1    5 -0.8492300 -4.0380621 -11.9800028 -1.915809  -9.738128
## 6:         1     1    6 -0.9034648 -4.9415269 -16.9215298 -1.874347 -11.612474
##    location_y      acc_z    speed_z location_z
## 1:  -1.959984 -0.2490781 -0.2490781 -0.2490781
## 2:  -5.879951 -0.2490781 -0.4981562 -0.7472342
## 3: -11.756519 -0.2514390 -0.7495951 -1.4968294
## 4: -19.578838 -0.2590101 -1.0086052 -2.5054346
## 5: -29.316966 -0.2799033 -1.2885085 -3.7939431
## 6: -40.929440 -0.3088358 -1.5973444 -5.3912875

Part 1 Gesture Recognition

We calculated our location values above. By using these values, we will create x,y and z variables. By using plot_ly function we will draw the graphs of our 3D models for all 8 classes.

Class 1 Instance 111

data1=merged[instances == 111 & class == 1]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, first image has a sharp corner. In our plot we don’t have the sharp corner but the shape is similar.

Class 2 Instance 222

data1=merged[instances == 222 & class == 2]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, second image has sharp corners and it looks like it returns to its start point. In our plot the image is following the same pattern but the end did not return to where it started.

Class 3 Instance 333

data1=merged[instances == 333 & class == 3]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, third image is a straight line. In our plot the image is like a straight line but at the end of it, it has a little curve.

Class 4 Instance 444

data1=merged[instances == 444 & class == 4]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, fourth image is a straight line. In our plot the image is like a straight line at the beginning but later, it becomes a curve.

Class 5 Instance 555

data1=merged[instances == 555 & class == 5]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, fifth image is a straight line. In our plot the image is like a straight line at the beginning but later, it becomes a curve.

Class 6 Instance 666

data1=merged[instances == 666 & class == 6]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, sixth image is a straight line. In our plot the image is like a straight line at the beginning but later, it has some oscillations in the end.

Class 7 Instance 777

data1=merged[instances == 777 & class == 7]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, seventh image is a curve and it looks like it returns to its start point. In our plot the image is following the same pattern but the end did not return to where it started.

Class 8 Instance 888

data1=merged[instances == 888 & class == 8]
x=data1$location_x 
y=data1$location_y 
z=data1$location_z 


data1$color <- as.factor(data1$color)

fig <- plot_ly(data1, x = ~x, y = ~y, z = ~z, type = 'scatter3d', mode = 'marker',opacity = 1, color = (1:315))

fig

In gesture vocabulary figure in our pdf, eighth image is a curve and it looks like it returns to its start point. It is like the seventh image but the way is reversed. In our plot the image is following the same pattern but the end did not return to where it started.

For different values of instances and class values, we have different location values. Our figures are mostly similar to our gestures. There are some small differences which may caused by the selected instances. Different instances in the same class may have small effects on our figures.

Part 2 Alternatives

Individual model representation

We will use autoregressive approach in this method. We will use lag2. We will use AR2 model as a function to get the coefficients. We will do this for x, y and z axes. Then we will see the lag values we obtained. Here we have some NA values while calculating lagged values.

im_merged=copy(merged)
im_merged=im_merged[order(instances,time)]
im_merged[,lag1_x:=shift(location_x,1),by=list(instances)]
im_merged[,lag2_x:=shift(location_x,2),by=list(instances)]

im_merged[,lag1_y:=shift(location_y,1),by=list(instances)]
im_merged[,lag2_y:=shift(location_y,2),by=list(instances)]

im_merged[,lag1_z:=shift(location_z,1),by=list(instances)]
im_merged[,lag2_z:=shift(location_z,2),by=list(instances)]


head(im_merged[,c(13:18)])
##         lag1_x     lag2_x     lag1_y     lag2_y     lag1_z     lag2_z
## 1:          NA         NA         NA         NA         NA         NA
## 2:  -0.7914472         NA  -1.959984         NA -0.2490781         NA
## 3:  -2.3743415 -0.7914472  -5.879951  -1.959984 -0.7472342 -0.2490781
## 4:  -4.7531086 -2.3743415 -11.756519  -5.879951 -1.4968294 -0.7472342
## 5:  -7.9419407 -4.7531086 -19.578838 -11.756519 -2.5054346 -1.4968294
## 6: -11.9800028 -7.9419407 -29.316966 -19.578838 -3.7939431 -2.5054346
series_id=unique(im_merged$instances)

fit_ar2_x1=function(dat){
    fit_x1=lm(location_x~lag1_x+lag2_x,dat)
    return(data.frame(t(coef(fit_x1))))
}


fit_ar2_y1=function(dat){
    fit_y1=lm(location_y~lag1_y+lag2_y,dat)
    return(data.frame(t(coef(fit_y1))))
}


fit_ar2_z1=function(dat){
    fit_z1=lm(location_z~lag1_z+lag2_z,dat)
    return(data.frame(t(coef(fit_z1))))
}
fitted_coef_x1=lapply(series_id,function(x) fit_ar2_x1(im_merged[instances==x]))
fitted_coef_y1=lapply(series_id,function(y) fit_ar2_y1(im_merged[instances==y]))
fitted_coef_z1=lapply(series_id,function(z) fit_ar2_z1(im_merged[instances==z]))

coef_dt_x1=rbindlist(fitted_coef_x1)
coef_dt_y1=rbindlist(fitted_coef_y1)
coef_dt_z1=rbindlist(fitted_coef_z1)
                   
head(coef_dt_x1)
##    X.Intercept.   lag1_x     lag2_x
## 1:   -1.4727362 1.995253 -0.9957569
## 2:    0.2405608 2.007370 -1.0075834
## 3:    0.2605330 2.003429 -1.0036664
## 4:    0.6527754 1.998588 -0.9987479
## 5:   -0.2526508 2.003568 -1.0041588
## 6:   -1.8341848 1.991675 -0.9919887
head(coef_dt_y1)
##    X.Intercept.   lag1_y     lag2_y
## 1:   -2.2217499 1.990729 -0.9908522
## 2:   -2.1082745 1.993142 -0.9932512
## 3:   -1.7057580 1.994594 -0.9947060
## 4:   -1.8029859 1.991345 -0.9914687
## 5:   -0.9573037 2.001409 -1.0015131
## 6:   -1.7373955 1.996401 -0.9965036
head(coef_dt_z1)
##    X.Intercept.   lag1_z     lag2_z
## 1:   -0.6686476 1.997481 -0.9976545
## 2:   -1.3177676 1.993012 -0.9932039
## 3:   -1.5370083 1.992113 -0.9922937
## 4:   -2.0897039 1.989055 -0.9893641
## 5:   -1.1766335 1.997886 -0.9979903
## 6:   -2.0553587 1.984864 -0.9850670

We need to organize our new created data by using instances column in order to have a better organized data.

coef_dt_x1[,instances:=series_id]
coef_dt_y1[,instances:=series_id]
coef_dt_z1[,instances:=series_id]

coef_dt=x1=merge(coef_dt_x1,train_x[,list(instances,class)],by='instances')
coef_dt=y1=merge(coef_dt_y1,train_y[,list(instances,class)],by='instances')
coef_dt=z1=merge(coef_dt_z1,train_z[,list(instances,class)],by='instances')
head(coef_dt_x1)
##    X.Intercept.   lag1_x     lag2_x instances
## 1:   -1.4727362 1.995253 -0.9957569         1
## 2:    0.2405608 2.007370 -1.0075834         2
## 3:    0.2605330 2.003429 -1.0036664         3
## 4:    0.6527754 1.998588 -0.9987479         4
## 5:   -0.2526508 2.003568 -1.0041588         5
## 6:   -1.8341848 1.991675 -0.9919887         6
head(coef_dt_y1)
##    X.Intercept.   lag1_y     lag2_y instances
## 1:   -2.2217499 1.990729 -0.9908522         1
## 2:   -2.1082745 1.993142 -0.9932512         2
## 3:   -1.7057580 1.994594 -0.9947060         3
## 4:   -1.8029859 1.991345 -0.9914687         4
## 5:   -0.9573037 2.001409 -1.0015131         5
## 6:   -1.7373955 1.996401 -0.9965036         6
head(coef_dt_z1)
##    X.Intercept.   lag1_z     lag2_z instances
## 1:   -0.6686476 1.997481 -0.9976545         1
## 2:   -1.3177676 1.993012 -0.9932039         2
## 3:   -1.5370083 1.992113 -0.9922937         3
## 4:   -2.0897039 1.989055 -0.9893641         4
## 5:   -1.1766335 1.997886 -0.9979903         5
## 6:   -2.0553587 1.984864 -0.9850670         6
ggplot(coef_dt_x1,aes(x=lag1_x,y=lag2_x,color=instances)) + geom_point(size = 3)

ggplot(coef_dt_y1,aes(x=lag1_y,y=lag2_y,color=instances)) + geom_point(size = 3)

ggplot(coef_dt_z1,aes(x=lag1_z,y=lag2_z,color=instances)) + geom_point(size = 3)

By plotting our new data set, we see our lag values for x, y and z. Visualization is shown like overlapping because we used 2D scatter plot.

Class based representation

Here we use autogressive model with lag2 again. We can represent the series with goodness of fit to class-based models. We first create fit functions for x, y and z. Then we build our model for all axes.

class_id=unique(im_merged$class)

fit_ar2_x2=function(dat){
    fit_x2=lm(location_x~lag1_x+lag2_x,dat)
    return(fit_x2)
}


fit_ar2_y2=function(dat){
    fit_y2=lm(location_y~lag1_y+lag2_y,dat)
    return(data.frame(t(coef(fit_y2))))
}


fit_ar2_z2=function(dat){
    fit_z2=lm(location_z~lag1_z+lag2_z,dat)
    return(data.frame(t(coef(fit_z2))))
}

fitted_model_x2=lapply(class_id,function(x) fit_ar2_x2(im_merged[class==x]))
fitted_model_y2=lapply(class_id,function(y) fit_ar2_y2(im_merged[class==y]))
fitted_model_z2=lapply(class_id,function(z) fit_ar2_z2(im_merged[class==z]))

merged_p=copy(im_merged)                   
for(i in 1:length(class_id)){
    current_class1=class_id[i]
    merged_p[,paste0('residual_',current_class1):=location_x-predict(fitted_model_x2[[i]],merged_p)] 
    
}
                     
head(merged_p[,c(19:26)])
##    residual_1 residual_2  residual_3 residual_4 residual_5 residual_6
## 1:         NA         NA          NA         NA         NA         NA
## 2:         NA         NA          NA         NA         NA         NA
## 3: -0.7075486 -0.7554433  0.05651642  -1.679179 -0.8617372 -0.8322720
## 4: -0.7187654 -0.7678185  0.04366433  -1.691928 -0.8706960 -0.8418483
## 5: -0.7549653 -0.8052237  0.00579223  -1.729695 -0.9045849 -0.8763617
## 6: -0.8061491 -0.8576948 -0.04715158  -1.782529 -0.9533345 -0.9257625
##    residual_7 residual_8
## 1:         NA         NA
## 2:         NA         NA
## 3: -0.6915450 -0.8944311
## 4: -0.7033725 -0.9061283
## 5: -0.7401947 -0.9428346
## 6: -0.7920315 -0.9945649

We can see the residual values. Here first two values are NA because of lag2. Then we will calculate mean for all 8 classes

residual_stats=merged_p[,list(m1=mean(residual_1,na.rm=T),
                              m2=mean(residual_2,na.rm=T),
                              m3=mean(residual_3,na.rm=T),
                              m4=mean(residual_4,na.rm=T),
                              m5=mean(residual_5,na.rm=T),
                              m6=mean(residual_6,na.rm=T),
                              m7=mean(residual_7,na.rm=T),
                              m8=mean(residual_8,na.rm=T)),by=list(instances,class)]
head(residual_stats)
##    instances class         m1          m2        m3         m4          m5
## 1:         1     1 -0.1273735 -0.27972298 0.5989884 -1.1374189 -0.22129609
## 2:         2     1  0.1460439  0.21545694 0.9957823 -0.7430690 -0.13241443
## 3:         3     1  0.1338245  0.15307239 0.9440284 -0.7932087 -0.08770754
## 4:         4     1  0.3057924  0.42674383 1.1616495 -0.5757106  0.01390236
## 5:         5     1  0.0256679 -0.02508071 0.8011771 -0.9356764 -0.14316481
## 6:         6     1 -0.2790397 -0.53491095 0.3950943 -1.3406620 -0.29382613
##             m6          m7         m8
## 1: -0.18056743 -0.11711695 -0.3815349
## 2: -0.08917091  0.18797556  0.0252637
## 3: -0.05212328  0.16336624 -0.0145912
## 4:  0.04397429  0.34494906  0.2209322
## 5: -0.10578378  0.04712071 -0.1657747
## 6: -0.25099870 -0.28115791 -0.5966362
residual_stats=melt(residual_stats,id.vars=c('instances','class'))

ggplot(residual_stats, aes(x=variable, y=value, color=variable)) +
  geom_boxplot() + facet_wrap(~class)

As we see from the box plots, classes have different values and have different means. We can use this information to seperate and analyze them from each other.

Conclusion

From part 2, we can compare the two methods we used for representation. In individual model representation we used autoregressive model with lag2 and we have some overlaps in scatter plot. In class based representation we also used autoregressive model with lag2. But different from individual model representation, we used mean calculation approach in class based representation method. The box plot seems more reasonable representation than scatter plot. We can learn more information from analyzing the box plot. So using class based representation will be more helpful to us in analyzing the data and the classes. We also had great visualizations by using class-instance columns method.